This is a Rmd file analyzing our raw count data by the glmmSeq package as described in the vignette and manual.

sessionInfo() #provides list of loaded packages and version of R. I still have version 4.1 for now.
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.31   R6_2.5.1        jsonlite_1.8.4  evaluate_0.20  
##  [5] cachem_1.0.7    rlang_1.1.0     cli_3.6.1       rstudioapi_0.13
##  [9] jquerylib_0.1.4 bslib_0.4.2     rmarkdown_2.21  tools_4.1.2    
## [13] xfun_0.39       yaml_2.3.7      fastmap_1.1.1   compiler_4.1.2 
## [17] htmltools_0.5.5 knitr_1.42      sass_0.4.5
if ("rtracklayer" %in% rownames(installed.packages()) == 'FALSE') BiocManager::install("rtracklayer")
if ("goseq" %in% rownames(installed.packages()) == 'FALSE') BiocManager::install('goseq') 
if ("rrvgo" %in% rownames(installed.packages()) == 'FALSE') BiocManager::install("rrvgo")
if ("GO.db" %in% rownames(installed.packages()) == 'FALSE') BiocManager::install("GO.db")
#BiocManager::install("org.Ce.eg.db", force=TRUE) #install if needed

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## 
library(rtracklayer)
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:geneLenDataBase':
## 
##     unfactor
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## Loading required package: GenomeInfoDb
library(rrvgo)
library(GO.db)
## Loading required package: AnnotationDbi
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
## 
##     select
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:S4Vectors':
## 
##     expand
library(forcats)
sessionInfo() #list of packages after library-ing these packages
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] forcats_0.5.1          tidyr_1.3.0            GO.db_3.14.0          
##  [4] AnnotationDbi_1.56.2   Biobase_2.54.0         rrvgo_1.6.0           
##  [7] rtracklayer_1.54.0     GenomicRanges_1.46.1   GenomeInfoDb_1.30.0   
## [10] IRanges_2.28.0         S4Vectors_0.32.3       BiocGenerics_0.40.0   
## [13] goseq_1.46.0           geneLenDataBase_1.30.0 BiasedUrn_2.0.9       
## [16] ggplot2_3.4.2          dplyr_1.1.2           
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.1-0            rjson_0.2.20               
##   [3] ellipsis_0.3.2              XVector_0.34.0             
##   [5] rstudioapi_0.13             ggrepel_0.9.3              
##   [7] bit64_4.0.5                 fansi_1.0.4                
##   [9] xml2_1.3.3                  splines_4.1.2              
##  [11] cachem_1.0.7                GOSemSim_2.20.0            
##  [13] knitr_1.42                  jsonlite_1.8.4             
##  [15] Rsamtools_2.10.0            gridBase_0.4-7             
##  [17] dbplyr_2.1.1                png_0.1-7                  
##  [19] pheatmap_1.0.12             shiny_1.7.1                
##  [21] compiler_4.1.2              httr_1.4.5                 
##  [23] assertthat_0.2.1            Matrix_1.4-0               
##  [25] fastmap_1.1.1               cli_3.6.1                  
##  [27] later_1.3.0                 htmltools_0.5.5            
##  [29] prettyunits_1.1.1           tools_4.1.2                
##  [31] igraph_1.4.2                NLP_0.2-1                  
##  [33] gtable_0.3.3                glue_1.6.2                 
##  [35] GenomeInfoDbData_1.2.7      rappdirs_0.3.3             
##  [37] Rcpp_1.0.10                 slam_0.1-50                
##  [39] jquerylib_0.1.4             vctrs_0.6.2                
##  [41] Biostrings_2.62.0           nlme_3.1-153               
##  [43] xfun_0.39                   stringr_1.5.0              
##  [45] mime_0.12                   lifecycle_1.0.3            
##  [47] restfulr_0.0.15             XML_3.99-0.8               
##  [49] zlibbioc_1.40.0             scales_1.2.1               
##  [51] treemap_2.4-3               hms_1.1.1                  
##  [53] promises_1.2.0.1            MatrixGenerics_1.6.0       
##  [55] parallel_4.1.2              SummarizedExperiment_1.24.0
##  [57] RColorBrewer_1.1-3          yaml_2.3.7                 
##  [59] curl_5.0.0                  memoise_2.0.1              
##  [61] sass_0.4.5                  biomaRt_2.50.3             
##  [63] stringi_1.7.12              RSQLite_2.2.9              
##  [65] BiocIO_1.4.0                GenomicFeatures_1.46.5     
##  [67] filelock_1.0.2              BiocParallel_1.28.3        
##  [69] rlang_1.1.0                 pkgconfig_2.0.3            
##  [71] matrixStats_0.61.0          bitops_1.0-7               
##  [73] evaluate_0.20               lattice_0.20-45            
##  [75] purrr_1.0.1                 GenomicAlignments_1.30.0   
##  [77] bit_4.0.4                   tidyselect_1.2.0           
##  [79] magrittr_2.0.3              R6_2.5.1                   
##  [81] generics_0.1.3              DelayedArray_0.20.0        
##  [83] DBI_1.1.2                   pillar_1.9.0               
##  [85] withr_2.5.0                 mgcv_1.8-38                
##  [87] KEGGREST_1.34.0             RCurl_1.98-1.5             
##  [89] tibble_3.2.1                crayon_1.5.2               
##  [91] wordcloud_2.6               utf8_1.2.3                 
##  [93] BiocFileCache_2.2.1         rmarkdown_2.21             
##  [95] progress_1.2.2              grid_4.1.2                 
##  [97] data.table_1.14.8           blob_1.2.2                 
##  [99] digest_0.6.31               xtable_1.8-4               
## [101] tm_0.7-11                   httpuv_1.6.4               
## [103] munsell_0.5.0               bslib_0.4.2

2631 genes are frontloaded, with a yall ratio larger than 1 and a xall_1 ratio less than 1.

DEGs <- read.csv(file="../../../output/Slope_Base/signif_genes_normcts.csv", sep=',', header=TRUE)  %>% dplyr::select(!c('X'))

#NOTE! This is not a file only with differentially expressed genes, this contains all of the genes in our dataset but also contains p-value information and fold change information to help determine which genes are signficant DEGs based on our model in glmmSeq

rownames(DEGs) <- DEGs$Gene

dim(DEGs)
## [1] 9011   75
genes <- rownames(DEGs)

Generate files for Functional Enrichment

Based off of Ariana’s script

Functional annotation file obtained from: http://cyanophora.rutgers.edu/Pocillopora_acuta/Pocillopora_acuta_HIv2.genes.EggNog_results.txt.gz on April 3, 2023.

Pacuta.annot <- read.delim("../../../data/Pocillopora_acuta_HIv2.genes.EggNog_results.txt") %>% dplyr::rename("query" = X.query)

dim(Pacuta.annot)
## [1] 16784    21
head(Pacuta.annot)
##                                        query         seed_ortholog    evalue
## 1 Pocillopora_acuta_HIv2___RNAseq.g24143.t1a        45351.EDO48725 2.16e-120
## 2  Pocillopora_acuta_HIv2___RNAseq.g18333.t1        45351.EDO38694 3.18e-123
## 3   Pocillopora_acuta_HIv2___RNAseq.g7985.t1 192875.XP_004345759.1 1.70e-183
## 4  Pocillopora_acuta_HIv2___RNAseq.g13511.t1        45351.EDO28722  2.94e-48
## 5      Pocillopora_acuta_HIv2___TS.g15308.t1  10224.XP_006813307.1  3.19e-20
## 6   Pocillopora_acuta_HIv2___RNAseq.g2057.t1 106582.XP_004573970.1  3.70e-14
##   score
## 1 364.0
## 2 355.0
## 3 526.0
## 4 172.0
## 5  92.4
## 6  68.2
##                                                                                                                                                                 eggNOG_OGs
## 1                                                                                       COG0620@1|root,KOG2263@2759|Eukaryota,38GZS@33154|Opisthokonta,3BNKS@33208|Metazoa
## 2                                                                                       COG0450@1|root,KOG0852@2759|Eukaryota,38B9P@33154|Opisthokonta,3BGS4@33208|Metazoa
## 3                                                                                                           COG3239@1|root,KOG4232@2759|Eukaryota,39UMW@33154|Opisthokonta
## 4                                                                                           2ED36@1|root,2SITK@2759|Eukaryota,39IIK@33154|Opisthokonta,3C3PY@33208|Metazoa
## 5                                                                                       COG2801@1|root,KOG0017@2759|Eukaryota,38F42@33154|Opisthokonta,3BA5H@33208|Metazoa
## 6 2CSTD@1|root,2S4A7@2759|Eukaryota,3A79X@33154|Opisthokonta,3BT5E@33208|Metazoa,3D9B1@33213|Bilateria,48FVM@7711|Chordata,49CYZ@7742|Vertebrata,4A886@7898|Actinopterygii
##        max_annot_lvl COG_category
## 1      33208|Metazoa            E
## 2      33208|Metazoa            O
## 3 33154|Opisthokonta            I
## 4      33208|Metazoa            -
## 5      33208|Metazoa           OU
## 6      33208|Metazoa            S
##                                                      Description Preferred_name
## 1               Cobalamin-independent synthase, Catalytic domain              -
## 2            negative regulation of male germ cell proliferation          PRDX4
## 3                                          Fatty acid desaturase              -
## 4                                                              -              -
## 5                                                   K02A2.6-like              -
## 6 Phosphatidylinositol N-acetylglucosaminyltransferase subunit Y           PIGY
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         GOs
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         -
## 2 GO:0000003,GO:0001775,GO:0002252,GO:0002263,GO:0002274,GO:0002275,GO:0002283,GO:0002366,GO:0002376,GO:0002443,GO:0002444,GO:0002446,GO:0003006,GO:0003674,GO:0003824,GO:0004601,GO:0005488,GO:0005515,GO:0005575,GO:0005576,GO:0005615,GO:0005622,GO:0005623,GO:0005737,GO:0005783,GO:0005790,GO:0005829,GO:0006082,GO:0006457,GO:0006464,GO:0006468,GO:0006520,GO:0006575,GO:0006793,GO:0006796,GO:0006807,GO:0006810,GO:0006887,GO:0006915,GO:0006950,GO:0006952,GO:0006955,GO:0006979,GO:0007154,GO:0007165,GO:0007249,GO:0007252,GO:0007275,GO:0007276,GO:0007283,GO:0007548,GO:0008150,GO:0008152,GO:0008219,GO:0008285,GO:0008379,GO:0008406,GO:0008584,GO:0009056,GO:0009266,GO:0009409,GO:0009605,GO:0009607,GO:0009617,GO:0009628,GO:0009636,GO:0009893,GO:0009966,GO:0009967,GO:0009987,GO:0010467,GO:0010604,GO:0010646,GO:0010647,GO:0010941,GO:0010942,GO:0010950,GO:0010952,GO:0012501,GO:0012505,GO:0016043,GO:0016192,GO:0016209,GO:0016310,GO:0016491,GO:0016684,GO:0016999,GO:0017001,GO:0017144,GO:0019222,GO:0019471,GO:0019538,GO:0019725,GO:0019752,GO:0019953,GO:0022414,GO:0022417,GO:0023051,GO:0023052,GO:0023056,GO:0030141,GO:0030162,GO:0030198,GO:0031323,GO:0031325,GO:0031410,GO:0031974,GO:0031982,GO:0031983,GO:0032268,GO:0032270,GO:0032501,GO:0032502,GO:0032504,GO:0032940,GO:0033554,GO:0034774,GO:0035556,GO:0036211,GO:0036230,GO:0042119,GO:0042127,GO:0042221,GO:0042592,GO:0042737,GO:0042742,GO:0042743,GO:0042744,GO:0042802,GO:0042803,GO:0042981,GO:0043062,GO:0043065,GO:0043067,GO:0043068,GO:0043085,GO:0043170,GO:0043207,GO:0043226,GO:0043227,GO:0043229,GO:0043231,GO:0043233,GO:0043280,GO:0043281,GO:0043299,GO:0043312,GO:0043412,GO:0043436,GO:0043900,GO:0043901,GO:0044093,GO:0044237,GO:0044238,GO:0044248,GO:0044260,GO:0044267,GO:0044281,GO:0044421,GO:0044422,GO:0044424,GO:0044433,GO:0044444,GO:0044446,GO:0044464,GO:0044703,GO:0045055,GO:0045137,GO:0045321,GO:0045454,GO:0045862,GO:0046425,GO:0046427,GO:0046546,GO:0046661,GO:0046903,GO:0046983,GO:0048232,GO:0048513,GO:0048518,GO:0048519,GO:0048522,GO:0048523,GO:0048583,GO:0048584,GO:0048608,GO:0048609,GO:0048731,GO:0048856,GO:0050789,GO:0050790,GO:0050794,GO:0050896,GO:0051171,GO:0051173,GO:0051179,GO:0051186,GO:0051187,GO:0051234,GO:0051239,GO:0051241,GO:0051246,GO:0051247,GO:0051336,GO:0051345,GO:0051604,GO:0051704,GO:0051707,GO:0051716,GO:0051920,GO:0052547,GO:0052548,GO:0055114,GO:0060205,GO:0060255,GO:0061458,GO:0065007,GO:0065008,GO:0065009,GO:0070013,GO:0070417,GO:0070887,GO:0071704,GO:0071840,GO:0072593,GO:0080090,GO:0097190,GO:0097237,GO:0097708,GO:0098542,GO:0098754,GO:0098869,GO:0099503,GO:0101002,GO:1901564,GO:1901605,GO:1902531,GO:1902533,GO:1904813,GO:1904892,GO:1904894,GO:1905936,GO:1905937,GO:1990748,GO:2000116,GO:2000241,GO:2000242,GO:2000254,GO:2000255,GO:2001056,GO:2001233,GO:2001235,GO:2001267,GO:2001269
## 3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         -
## 4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         -
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         -
## 6                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 GO:0000506,GO:0005575,GO:0005622,GO:0005623,GO:0005737,GO:0005783,GO:0005789,GO:0005886,GO:0006464,GO:0006497,GO:0006505,GO:0006506,GO:0006629,GO:0006643,GO:0006644,GO:0006650,GO:0006661,GO:0006664,GO:0006793,GO:0006796,GO:0006807,GO:0008150,GO:0008152,GO:0008610,GO:0008654,GO:0009058,GO:0009059,GO:0009247,GO:0009987,GO:0012505,GO:0016020,GO:0016254,GO:0019538,GO:0019637,GO:0031984,GO:0032991,GO:0034645,GO:0036211,GO:0042157,GO:0042158,GO:0042175,GO:0043170,GO:0043226,GO:0043227,GO:0043229,GO:0043231,GO:0043412,GO:0044237,GO:0044238,GO:0044249,GO:0044255,GO:0044260,GO:0044267,GO:0044422,GO:0044424,GO:0044425,GO:0044432,GO:0044444,GO:0044446,GO:0044464,GO:0045017,GO:0046467,GO:0046474,GO:0046486,GO:0046488,GO:0071704,GO:0071944,GO:0090407,GO:0098796,GO:0098827,GO:1901135,GO:1901137,GO:1901564,GO:1901566,GO:1901576,GO:1902494,GO:1903509,GO:1990234
##           EC   KEGG_ko
## 1   2.1.1.14 ko:K00549
## 2  1.11.1.15 ko:K03386
## 3 1.14.19.31 ko:K12418
## 4          -         -
## 5          -         -
## 6          - ko:K11001
##                                                                           KEGG_Pathway
## 1 ko00270,ko00450,ko01100,ko01110,ko01230,map00270,map00450,map01100,map01110,map01230
## 2                                                                     ko04214,map04214
## 3                                                                                    -
## 4                                                                                    -
## 5                                                                                    -
## 6                                                    ko00563,ko01100,map00563,map01100
##   KEGG_Module KEGG_Reaction             KEGG_rclass
## 1      M00017 R04405,R09365 RC00035,RC00113,RC01241
## 2           -             -                       -
## 3           -             -                       -
## 4           -             -                       -
## 5           -             -                       -
## 6           -        R05916                       -
##                             BRITE KEGG_TC CAZy BiGG_Reaction
## 1 ko00000,ko00001,ko00002,ko01000       -    -             -
## 2 ko00000,ko00001,ko01000,ko04147       -    -             -
## 3         ko00000,ko01000,ko01004       -    -             -
## 4                               -       -    -             -
## 5                               -       -    -             -
## 6                 ko00000,ko00001       -    -             -
##                  PFAMs
## 1          Meth_synt_2
## 2  1-cysPrx_C,AhpC-TSA
## 3 Cyt-b5,FA_desaturase
## 4                    -
## 5            RVT_1,rve
## 6                PIG-Y
genes2annot = match(genes, Pacuta.annot$query) #match genes in DEGs (all genes after filtering) to genes in annotation file

sum(is.na(genes2annot)) #number of genes without EggNog annotation
## [1] 2812
missing<-as.data.frame(genes[which(is.na(genes2annot))]) #dataframe of genes without EggNog annotation
head(missing)
##            genes[which(is.na(genes2annot))]
## 1 Pocillopora_acuta_HIv2___RNAseq.g7479.t3a
## 2 Pocillopora_acuta_HIv2___RNAseq.g7479.t3b
## 3   Pocillopora_acuta_HIv2___RNAseq.g682.t1
## 4  Pocillopora_acuta_HIv2___RNAseq.g4805.t1
## 5 Pocillopora_acuta_HIv2___RNAseq.g11061.t1
## 6 Pocillopora_acuta_HIv2___RNAseq.g16217.t1

2812/9011 genes without eggnog annotation

names(Pacuta.annot)
##  [1] "query"          "seed_ortholog"  "evalue"         "score"         
##  [5] "eggNOG_OGs"     "max_annot_lvl"  "COG_category"   "Description"   
##  [9] "Preferred_name" "GOs"            "EC"             "KEGG_ko"       
## [13] "KEGG_Pathway"   "KEGG_Module"    "KEGG_Reaction"  "KEGG_rclass"   
## [17] "BRITE"          "KEGG_TC"        "CAZy"           "BiGG_Reaction" 
## [21] "PFAMs"
geneInfo0 = data.frame(gene_id = genes, #add gene id
  Accession = Pacuta.annot$seed_ortholog[genes2annot], #add accession number
  Bitscore = Pacuta.annot$score[genes2annot], #add bitscore
  eValue = Pacuta.annot$evalue[genes2annot], #add e value
  Description = Pacuta.annot$Description[genes2annot], #add description of gene
  Annotation.GO.ID = Pacuta.annot$GOs[genes2annot], #add GO ID's
  q_Origin = DEGs$Origin, #add Origin adjusted p-value
  q_Treatment = DEGs$Treatment, #add Treatment adjusted p-value
  q_Interaction = DEGs$Treatment.Origin, #add Treatment:Origin adjusted p-value
  Stable_OriginFC = DEGs$Stable_OriginFC, #add fold change for Slope vs Flat in the stable treatment
  Variable_OriginFC = DEGs$Variable_OriginFC, #add fold change for Slope vs Flat in the variable treatment
  maxGroupFC = DEGs$maxGroupFC, #add max group fold change (was FC bigger in stable of variable treatment)
  col = DEGs$col) #add qualitative significance info

dim(geneInfo0)
## [1] 9011   13
head(geneInfo0,2)
##                                     gene_id      Accession Bitscore    eValue
## 1 Pocillopora_acuta_HIv2___RNAseq.g27841.t1 45351.EDO35402      566 4.00e-197
## 2 Pocillopora_acuta_HIv2___RNAseq.g14011.t1 45351.EDO48592      449 1.17e-151
##                                                 Description
## 1 cyclin-dependent protein serine/threonine kinase activity
## 2          TAF6-like RNA polymerase II, p300 CBP-associated
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                Annotation.GO.ID
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                                               GO:0000003,GO:0003006,GO:0003674,GO:0003824,GO:0004672,GO:0004674,GO:0004693,GO:0005575,GO:0005622,GO:0005623,GO:0005634,GO:0005737,GO:0005813,GO:0005815,GO:0005856,GO:0006464,GO:0006468,GO:0006793,GO:0006796,GO:0006807,GO:0007154,GO:0007165,GO:0007548,GO:0008150,GO:0008152,GO:0009987,GO:0015630,GO:0016301,GO:0016310,GO:0016740,GO:0016772,GO:0016773,GO:0019538,GO:0022414,GO:0023052,GO:0032502,GO:0036211,GO:0043170,GO:0043226,GO:0043227,GO:0043228,GO:0043229,GO:0043231,GO:0043232,GO:0043412,GO:0044237,GO:0044238,GO:0044260,GO:0044267,GO:0044422,GO:0044424,GO:0044430,GO:0044446,GO:0044464,GO:0050789,GO:0050794,GO:0050896,GO:0051716,GO:0051726,GO:0065007,GO:0071704,GO:0097472,GO:0140096,GO:1901564
## 2 GO:0000118,GO:0000123,GO:0003674,GO:0003676,GO:0003677,GO:0003712,GO:0003713,GO:0005488,GO:0005575,GO:0005622,GO:0005623,GO:0005634,GO:0005654,GO:0005730,GO:0006325,GO:0006338,GO:0006355,GO:0006357,GO:0006464,GO:0006473,GO:0006475,GO:0006807,GO:0006996,GO:0008150,GO:0008152,GO:0009889,GO:0009891,GO:0009893,GO:0009987,GO:0010468,GO:0010556,GO:0010557,GO:0010604,GO:0016043,GO:0016569,GO:0016570,GO:0016573,GO:0016604,GO:0016607,GO:0018193,GO:0018205,GO:0018393,GO:0018394,GO:0019219,GO:0019222,GO:0019538,GO:0030914,GO:0031248,GO:0031323,GO:0031325,GO:0031326,GO:0031328,GO:0031974,GO:0031981,GO:0032991,GO:0036211,GO:0043170,GO:0043226,GO:0043227,GO:0043228,GO:0043229,GO:0043231,GO:0043232,GO:0043233,GO:0043412,GO:0043543,GO:0043966,GO:0044237,GO:0044238,GO:0044260,GO:0044267,GO:0044422,GO:0044424,GO:0044428,GO:0044446,GO:0044451,GO:0044464,GO:0045935,GO:0048518,GO:0048522,GO:0050789,GO:0050794,GO:0051171,GO:0051173,GO:0051252,GO:0051254,GO:0051276,GO:0060255,GO:0065007,GO:0070013,GO:0070461,GO:0071704,GO:0071840,GO:0080090,GO:0097159,GO:0140110,GO:1901363,GO:1901564,GO:1902493,GO:1902494,GO:1902680,GO:1903506,GO:1903508,GO:1990234,GO:2000112,GO:2001141
##    q_Origin q_Treatment q_Interaction Stable_OriginFC Variable_OriginFC
## 1 0.3325688           1     0.9994279      -0.2177817        -0.1121968
## 2 0.1514413           1     0.9994279       0.6976765         0.2428565
##   maxGroupFC             col
## 1     Stable Not Significant
## 2     Stable Not Significant

Add KEGG annotation information. Downloaded from: http://cyanophora.rutgers.edu/Pocillopora_acuta/Pocillopora_acuta_HIv2.genes.KEGG_results.txt.gz on April 3, 2023.

kegg<-read.table("../../../data/Pocillopora_acuta_HIv2.genes.KEGG_results.txt", sep="", quote="", na.strings=c("","NA"), blank.lines.skip = FALSE, header=FALSE)

dim(kegg)
## [1] 33730     2
head(kegg)
##                                           V1     V2
## 1 Pocillopora_acuta_HIv2___RNAseq.g24143.t1a   <NA>
## 2 Pocillopora_acuta_HIv2___RNAseq.g24143.t1b K22584
## 3  Pocillopora_acuta_HIv2___RNAseq.g22918.t1   <NA>
## 4  Pocillopora_acuta_HIv2___RNAseq.g18333.t1 K03386
## 5   Pocillopora_acuta_HIv2___RNAseq.g7985.t1   <NA>
## 6  Pocillopora_acuta_HIv2___RNAseq.g13511.t1   <NA>

Add KEGG annotations to each gene.

geneInfo0$KEGG <- kegg$V2[match(geneInfo0$gene_id, kegg$V1)]

Order geneInfo0 by significance of Origin, q_Origin (adjusted p value)

geneInfo <- geneInfo0[order(geneInfo0[, 'q_Origin']), ]

write.csv(geneInfo, file = "../../../output/Slope_Base/geneInfo.csv") #gene info for reference/supplement

Now onto Enrichment!

#geneInfo<-read.csv("../../../output/Slope_Base/geneInfo.csv")

dim(geneInfo)
## [1] 9011   14

Get gene length information.

#import file
gff <- rtracklayer::import("../../../data/Pocillopora_acuta_HIv2.genes_fixed.gff3") #if this doesn't work, restart R and try again 
transcripts <- subset(gff, type == "transcript") #keep only transcripts , not CDS or exons
transcript_lengths <- width(transcripts) #isolate length of each gene
seqnames<-transcripts$ID #extract list of gene id 
lengths<-cbind(seqnames, transcript_lengths)
lengths<-as.data.frame(lengths) #convert to data frame

geneInfo$Length<-lengths$transcript_lengths[match(geneInfo$gene_id, lengths$seqnames)] #Add in length to geneInfo

Format GO terms to remove dashes and quotes and separate by semicolons (replace , with ;) in Annotation.GO.ID column

geneInfo$Annotation.GO.ID <- gsub(",", ";", geneInfo$Annotation.GO.ID)
geneInfo$Annotation.GO.ID <- gsub('"', "", geneInfo$Annotation.GO.ID)
geneInfo$Annotation.GO.ID <- gsub("-", NA, geneInfo$Annotation.GO.ID)
### Generate vector with names of all genes 
ALL.vector <- c(geneInfo$gene_id)

### Generate length vector for all genes
LENGTH.vector <- as.integer(geneInfo$Length)


#### THIS IS WHERE THE FRONTLOADING NEEDS TO COME IN
#2631 genes are frontloaded, with a yall ratio larger than 1 and a xall_1 ratio less than 1.


FRONTs <- read.csv(file="../../../output/Slope_Base/frontloaded_genes.csv", sep=',', header=TRUE)  %>% dplyr::select(!c('X'))

### Generate vector with names of the 2631 frontloaded genes
ID.vector <- geneInfo %>%
  filter(gene_id %in% FRONTs$Gene) %>%
  pull(gene_id)

##Get a list of GO Terms for the 2631 frontloaded genes
GO.terms <- geneInfo %>%
  filter(gene_id %in% FRONTs$Gene) %>%
  dplyr::select(gene_id, Annotation.GO.ID)

##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$Annotation.GO.ID), ";")
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "Annotation.GO.ID")
GO.terms <- split2

##Construct list of genes with 1 for genes in that are frontloaded and 0 for those that are not
gene.vector = as.integer(ALL.vector %in% ID.vector) 

names(gene.vector) <- ALL.vector #set names

#weight gene vector by bias for length of gene
pwf <- nullp(gene.vector, bias.data = LENGTH.vector)
## Warning in pcls(G): initial point very close to some inequality constraints

#run goseq using Wallenius method for all categories of GO terms 
GO.wall <- goseq(pwf, gene2cat=GO.terms, method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]

colnames(GO)[1] <- "GOterm"
    
#adjust p-values 
GO$bh_adjust <-  p.adjust(GO$over_represented_pvalue, method="BH") #add adjusted p-values

#Write file of results 
write.csv(GO, "../../../output/Slope_Base/Frontloaded_GOSeq/GOseq_Frontloaded.csv")

#Filtering for p < 0.05
GO_05 <- GO %>%
        dplyr::filter(bh_adjust<0.05) %>%
        dplyr::arrange(., ontology, bh_adjust)

#Filtering for p < 0.05
GO_001 <- GO %>%
        dplyr::filter(bh_adjust<0.001) %>%
        dplyr::arrange(., ontology, bh_adjust)

#Filtering for p < 0.05
GO_052 <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, bh_adjust)

#Filtering for p < 0.05
GO_0012 <- GO %>%
        dplyr::filter(over_represented_pvalue<0.001) %>%
        dplyr::arrange(., ontology, bh_adjust)

Plotting!

ontologies<-c("BP", "CC", "MF")

for (category in ontologies) {
    
    go_results <- GO_05
    
    go_results<-go_results%>%
      filter(ontology==category)%>%
      filter(bh_adjust != "NA") %>%
      filter(numInCat>10)%>%
      arrange(., bh_adjust)
    
      #Reduce/collapse GO term set with the rrvgo package 
      simMatrix <- calculateSimMatrix(go_results$GOterm,
                                orgdb="org.Ce.eg.db", #c. elegans database
                                ont=category,
                                method="Rel")
    #calculate similarity 
    scores <- setNames(-log(go_results$bh_adjust), go_results$GOterm)
    reducedTerms <- reduceSimMatrix(simMatrix,
                                scores,
                                threshold=0.7,
                                orgdb="org.Ce.eg.db")
    
    #keep only the goterms from the reduced list
    go_results<-go_results%>%
      filter(GOterm %in% reducedTerms$go)
    
    #add in parent terms to list of go terms 
    go_results$ParentTerm<-reducedTerms$parentTerm[match(go_results$GOterm, reducedTerms$go)]
   
    #plot significantly enriched GO terms by Slim Category faceted by slim term 
  GO.plot <-  ggplot2::ggplot(go_results, aes(x = ontology, y = term)) + 
    geom_point(aes(size=bh_adjust)) + 
    scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
    facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
    theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
    strip.text.y = element_text(angle=0, size = 10),
    strip.text.x = element_text(size = 20),
    axis.text = element_text(size = 8),
    axis.title.x = element_blank(),
    axis.title.y = element_blank())
  
  print(GO.plot)
  
ggsave(filename=paste0("../../../output/Slope_Base/Frontloaded_GOSeq/FRONTs_P05", "_", category, ".png"), plot=GO.plot, dpi=100, width=12, height=100, units="in", limitsize=FALSE)
}
## 
## preparing gene to GO mapping data...
## preparing IC data...
## preparing gene to GO mapping data...
## preparing IC data...

## preparing gene to GO mapping data...
## preparing IC data...

Combining MF, CC, and BP into one plot and order by pvalue, based on Jill’s script

GO_plot_all <- GO_001 %>% drop_na(ontology) %>% mutate(term = fct_reorder(term, numDEInCat)) %>%
  mutate(term = fct_reorder(term, ontology)) %>%
  ggplot( aes(x=term, y=numDEInCat) ) +
  geom_segment( aes(x=term ,xend=term, y=0, yend=numDEInCat), color="grey") +
  #geom_text(aes(label = over_represented_pvalue), hjust = -1, vjust = 0, size = 2) +
  geom_point(size=1, aes(colour = ontology)) +
  coord_flip() +
  ylim(0,305) +
  theme(
    panel.grid.minor.y = element_blank(),
    panel.grid.major.y = element_blank(),
    legend.position="bottom"
  ) +
  xlab("") +
  ylab("") +
  theme_bw() + #Set background color 
  theme(panel.border = element_blank(), # Set border
        panel.grid.major = element_blank(), #Set major gridlines
        panel.grid.minor = element_blank(), #Set minor gridlines
        axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background #set title attributes

GO_plot_all
## Warning: Removed 108 rows containing missing values (`geom_segment()`).
## Warning: Removed 108 rows containing missing values (`geom_point()`).

ggsave("../../../output/Slope_Base/Frontloaded_GOSeq/FRONTs_P001.pdf", GO_plot_all, width = 40, height = 500, units = c("in"), dpi=100, limitsize=FALSE)
## Warning: Removed 108 rows containing missing values (`geom_segment()`).
## Removed 108 rows containing missing values (`geom_point()`).
#ordered by p-value
GO_plot_all_pval <- GO_001 %>% drop_na(ontology) %>% mutate(term = fct_reorder(term, over_represented_pvalue)) %>%
  mutate(term = fct_reorder(term, ontology)) %>%
  ggplot( aes(x=term, y=numDEInCat) ) +
  geom_segment( aes(x=term ,xend=term, y=0, yend=numDEInCat), color="grey") +
  #geom_text(aes(label = over_represented_pvalue), hjust = -1, vjust = 0, size = 2) +
  geom_point(size=1, aes(colour = ontology)) +
  coord_flip() +
  ylim(0,305) +
  theme(
    panel.grid.minor.y = element_blank(),
    panel.grid.major.y = element_blank(),
    legend.position="bottom"
  ) +
  xlab("") +
  ylab("") +
  theme_bw() + #Set background color 
  theme(panel.border = element_blank(), # Set border
        panel.grid.major = element_blank(), #Set major gridlines
        panel.grid.minor = element_blank(), #Set minor gridlines
        axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background #set title attributes

GO_plot_all_pval
## Warning: Removed 108 rows containing missing values (`geom_segment()`).
## Removed 108 rows containing missing values (`geom_point()`).

ggsave("../../../output/Slope_Base/Frontloaded_GOSeq/FRONTs_P001_orderedpval.pdf", GO_plot_all_pval, width = 40, height = 500, units = c("in"), dpi=100, limitsize=FALSE)
## Warning: Removed 108 rows containing missing values (`geom_segment()`).
## Removed 108 rows containing missing values (`geom_point()`).

KEGG enrichment

##Get a list of KEGG Terms for the 2631 frontloaded genes
KO.terms <- geneInfo %>%
  filter(gene_id %in% FRONTs$Gene) %>%
  dplyr::select(gene_id, KEGG)


#run goseq using Wallenius method for all categories of KEGG terms
KO.wall <-
  goseq(
    pwf,
    ID.vector,
    gene2cat = KO.terms,
    test.cats = c("KEGG"),
    method = "Wallenius",
    use_genes_without_cat = TRUE
  )
## Using manually entered categories.
## Calculating the p-values...
KO <- KO.wall[order(KO.wall$over_represented_pvalue), ]
colnames(KO)[1] <- "KEGGterm"

#adjust p-values
#KO$bh_adjust <- p.adjust(KO$over_represented_pvalue, method = "BH") #add adjusted p-values

#Filtering for p < 0.05
KO_05 <- KO %>%
  dplyr::filter(over_represented_pvalue < 0.05) %>%
  dplyr::arrange(., over_represented_pvalue)

head(KO_05)
##   KEGGterm over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 1   K20478            0.0002931040                        1          7        7
## 2   K04988            0.0009849526                        1          6        6
## 3   K06238            0.0106109555                        1          4        4
## 4   K22754            0.0106109555                        1          4        4
## 5   K11997            0.0149770737                        1          3        3
## 6   K02183            0.0198760162                        1          3        3
#Write file of results 
write.csv(KO, "../../../output/Slope_Base/Frontloaded_GOSeq/KEGG_Frontloaded.csv")

If using non-adjusted p-value, we get 6 significant KEGG terms:

Alt pipeline? Using all GO terms from all 9011 genes as input (instead of testing enrichment of GO terms from a list of frontloaded GO terms???????)

#GO.terms_alt

##Get a list of GO Terms for ALL 9011 genes
GO.terms_alt <- geneInfo %>%
  #filter(gene_id %in% FRONTs$Gene) %>%
  dplyr::select(gene_id, Annotation.GO.ID)

##Format to have one goterm per row with gene ID repeated
split_alt <- strsplit(as.character(GO.terms_alt$Annotation.GO.ID), ";")
split2_alt <- data.frame(v1 = rep.int(GO.terms_alt$gene, sapply(split_alt, length)), v2 = unlist(split_alt))
colnames(split2_alt) <- c("gene", "Annotation.GO.ID")
GO.terms_alt <- split2_alt

goseq_res <-goseq(pwf, gene2cat=GO.terms_alt, method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
head(goseq_res)
##         category over_represented_pvalue under_represented_pvalue numDEInCat
## 11429 GO:0051130            0.0001040086                0.9999277        217
## 7222  GO:0032886            0.0001638763                0.9999213         56
## 9599  GO:0044294            0.0002918094                1.0000000          7
## 13103 GO:0070507            0.0003615731                0.9998351         45
## 9846  GO:0045165            0.0004901344                0.9997054         91
## 10889 GO:0048557            0.0005575978                0.9998684         17
##       numInCat                                                   term ontology
## 11429      580 positive regulation of cellular component organization       BP
## 7222       121                regulation of microtubule-based process       BP
## 9599         7                                  dendritic growth cone       CC
## 13103       95    regulation of microtubule cytoskeleton organization       BP
## 9846       226                                   cell fate commitment       BP
## 10889       27                embryonic digestive tract morphogenesis       BP
#adjusting the p-values here makes all of them p=1 ?????

GO <- goseq_res[order(goseq_res$over_represented_pvalue),]

colnames(GO)[1] <- "GOterm"
    
#adjust p-values 
#GO$bh_adjust <-  p.adjust(GO$over_represented_pvalue, method="BH") #add adjusted p-values

#Write file of results 
write.csv(GO, "../../../output/Slope_Base/Frontloaded_GOSeq/GOseq_Frontloaded_ALT.csv")

#Filtering for p < 0.05
GO_05_ALT <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue)

#Filtering for p < 0.05
GO_001_ALT <- GO %>%
        dplyr::filter(over_represented_pvalue<0.001) %>%
        dplyr::arrange(., ontology, over_represented_pvalue)

Plotting!

ontologies<-c("BP", "CC", "MF")

for (category in ontologies) {
    
    go_results <- GO_05_ALT
    
    go_results<-go_results%>%
      filter(ontology==category)%>%
      filter(over_represented_pvalue != "NA") %>%
      filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)
    
      #Reduce/collapse GO term set with the rrvgo package 
      simMatrix <- calculateSimMatrix(go_results$GOterm,
                                orgdb="org.Ce.eg.db", #c. elegans database
                                ont=category,
                                method="Rel")
    #calculate similarity 
    scores <- setNames(-log(go_results$over_represented_pvalue), go_results$GOterm)
    reducedTerms <- reduceSimMatrix(simMatrix,
                                scores,
                                threshold=0.7,
                                orgdb="org.Ce.eg.db")
    
    #keep only the goterms from the reduced list
    go_results<-go_results%>%
      filter(GOterm %in% reducedTerms$go)
    
    #add in parent terms to list of go terms 
    go_results$ParentTerm<-reducedTerms$parentTerm[match(go_results$GOterm, reducedTerms$go)]
   
    #plot significantly enriched GO terms by Slim Category faceted by slim term 
  GO.plot <-  ggplot2::ggplot(go_results, aes(x = ontology, y = term)) + 
    geom_point(aes(size=over_represented_pvalue)) + 
    scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
    facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
    theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
    strip.text.y = element_text(angle=0, size = 10),
    strip.text.x = element_text(size = 20),
    axis.text = element_text(size = 8),
    axis.title.x = element_blank(),
    axis.title.y = element_blank())
  
  print(GO.plot)
  
ggsave(filename=paste0("../../../output/Slope_Base/Frontloaded_GOSeq/FRONTs_P05_ALT", "_", category, ".png"), plot=GO.plot, dpi=100, width=12, height=100, units="in", limitsize=FALSE)
}
## preparing gene to GO mapping data...
## preparing IC data...
## preparing gene to GO mapping data...
## preparing IC data...

## preparing gene to GO mapping data...
## preparing IC data...

KEGG enrichment

##Get a list of KEGG Terms for all genes
KO.terms <- geneInfo %>%
  #filter(gene_id %in% FRONTs$Gene) %>%
  dplyr::select(gene_id, KEGG)

#run goseq using Wallenius method for all categories of KEGG terms
KO.wall <-
  goseq(
    pwf,
    ID.vector,
    gene2cat = KO.terms,
    test.cats = c("KEGG"),
    method = "Wallenius",
    use_genes_without_cat = TRUE
  )
## Using manually entered categories.
## Calculating the p-values...
KO <- KO.wall[order(KO.wall$over_represented_pvalue), ]
colnames(KO)[1] <- "KEGGterm"

#adjust p-values
#KO$bh_adjust <- p.adjust(KO$over_represented_pvalue, method = "BH") #add adjusted p-values

#Filtering for p < 0.05
KO_05 <- KO %>%
  dplyr::filter(over_represented_pvalue < 0.05) %>%
  dplyr::arrange(., over_represented_pvalue)

head(KO_05)
##   KEGGterm over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 1   K17341              0.03305436                        1          3        3
#Write file of results 
write.csv(KO, "../../../output/Slope_Base/Frontloaded_GOSeq/KEGG_Frontloaded_ALT.csv")

If using non-adjusted p-value, we get one significant KEGG term: K17341 (HMCN) hemicentin, Age-related macular degeneration